


/*
Filename: B3_MergeEDGAR.do
Goal: Merge institutional holdings data with EDGAR access activity to analyze the relationship between information acquisition and bond holdings in regression analysis.
Contact: mjha@gsu.edu (author) and gormley@wustl.edu

EDGAR Activity and Holdings Regression Analysis
Project Overview
Objective: Merge institutional holdings data with EDGAR access activity to analyze the relationship between information acquisition and investment decisions, then conduct regression analysis.
Principal Contacts:

Primary: mjha@gsu.edu (Lead Author)
Secondary: gormley@wustl.edu


Data Sources
1. Institutional Holdings (Generated)
File: HoldingFromCRSP_CIK.dta
Source: Created by code in A1_CreateHolding
Description: Institutional equity and bond holdings linked to SEC CIK identifiers
Purpose: Measure institutional ownership stakes and portfolio characteristics

2. EDGAR Access Activity (Generated)
File: InstitutionIPLogCounts.csv
Source: Created by code in B2_DownloadEDGAR
Description: Institutional access counts from EDGAR log files
Purpose: Capture institutional information acquisition through SEC filing access

3. Meeting Details - General (Generated)
File: MeetingDetails.dta
Source: Created by code in B1_MeetingDetails
Description: Shareholder meeting characteristics and proposal details
Purpose: Control for meeting-level characteristics in governance analysis

4. Meeting Details - Say on Pay (Generated)
File: MeetingDetails_saypay.dta
Source: Created by code in B1_MeetingDetails
Description: Subset focused on say-on-pay proposals
Purpose: Enable targeted analysis of compensation-related information acquisition

5. Default Probability (Generated)
File: edf_cik.dta
Source: Created by code in A5_DefaultProb
Description: Expected default frequency measures linked to CIK identifiers
Purpose: Control for firm credit risk and financial distress
*/


capture log close
capture restore
set more off
clear all

global time = "month"
global institution = "parent_name"
global firm = "cik" 



**************************************** Merge Data
use "HoldingFromCRSP_CIK.dta", clear   
replace $time = $time + 1
save "HoldingA_CIK.dta", replace
replace $time = $time + 1
save "HoldingB_CIK.dta", replace 
replace $time = $time + 1
save "HoldingC_CIK.dta", replace

**** IP Activity
import delimited "InstitutionIPLogCounts.csv", clear 
rename meetingdate mdate
gen meetingdate = date(mdate, "YMD")
format meetingdate %td
gen month = mofd(meetingdate)
rename level parent_name

// add info on meeting
merge m:1 $firm meetingdate using "MeetingDetails.dta" 
//meeting details from WRDS
drop if _merge == 2
drop _merge

merge m:1 $institution $time $firm using "HoldingA_CIK.dta" 
drop if _merge == 2
drop _merge
foreach var of varlist tna*{
	rename `var' temp_`var'
}
foreach var of varlist mv*{
	rename `var' temp_`var'
}

merge m:1 $institution $time $firm using "HoldingB_CIK.dta" 
drop if _merge == 2
drop _merge
foreach var of varlist tna*{
	replace temp_`var' = `var' if missing(temp_`var')
	drop `var'
}
foreach var of varlist mv*{
	replace temp_`var' = `var' if missing(temp_`var')
	drop `var'
} 

merge m:1 $institution $time $firm using "HoldingC_CIK.dta"
drop if _merge == 2
drop _merge
foreach var of varlist tna*{
	replace temp_`var' = `var' if missing(temp_`var')
	drop `var'
	rename temp_`var' `var'
}
foreach var of varlist mv*{
	replace temp_`var' = `var' if missing(temp_`var')
	drop `var'
	rename temp_`var' `var'
}

drop if missing(tna)

gen year = yofd(meetingdate)
keep if year > 2007

***** variables for regression
gen edgardummy = 0
replace edgardummy = 1 if bcount > 0

gen bondper = mv_bond/mv_total
gen double inv_total = mv_total/tna
gen double inv_bond = mv_bond/tna
gen double inv_equity = (mv_total - mv_bond)/tna 
keep if inv_equity > 0

* winsor
foreach var of varlist inv*{
	winsor2 `var', cuts(1 99) replace
}  

* standardize
foreach var of varlist inv*{
	egen double std_`var' = std(`var')
}

** heterogeneity based on say on pay meeting
merge m:1 cik meetingdate using "MeetingDetails_saypay.dta"
drop if _merge == 2
drop _merge

merge m:1 $firm year using "edf_cik.dta" 
drop if _merge == 2
drop _merge

egen institution_month = group($institution $time)
egen meetingid = group($firm meetingdate)
save "RegEDGAR.dta", replace 

